6 Microbial metagenomics
6.1 Microbial DNA fraction
6.1.1 Data overview
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(sample_type) %>%
summarise(mean=mean(singlem_fraction, na.rm=T),sd=sd(singlem_fraction, na.rm=T)) %>%
tt()| sample_type | mean | sd |
|---|---|---|
| Anal/cloacal swab | 0.2598510 | 0.3407707 |
| Faecal | 0.5203527 | 0.2759433 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(tax_group) %>%
summarise(mean=mean(singlem_fraction, na.rm=T),sd=sd(singlem_fraction, na.rm=T)) %>%
tt()| tax_group | mean | sd |
|---|---|---|
| Amphibians | 0.5807892 | 0.1799217 |
| Bats | 0.2903304 | 0.2949997 |
| Birds | 0.2777113 | 0.3360460 |
| Mammals | 0.5871579 | 0.2841097 |
| Reptiles | 0.5885336 | 0.1701545 |
6.1.2 Statistical test
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
lm(singlem_fraction ~ sample_type * tax_group, data = .) %>%
anova() %>%
tidy()# A tibble: 4 × 6
term df sumsq meansq statistic p.value
<chr> <int> <dbl> <dbl> <dbl> <dbl>
1 sample_type 1 19.4 19.4 311. 6.69e- 65
2 tax_group 4 35.1 8.78 141. 3.70e-106
3 sample_type:tax_group 4 6.96 1.74 27.9 1.46e- 22
4 Residuals 2015 126. 0.0624 NA NA
6.1.3 Plot
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
mutate(host_percentage= host_bases/bases_post_fastp*100) %>% #convert bases to gigabases (GB)
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=singlem_fraction, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0, 1) +
geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
facet_grid(~sample_type) +
labs(y="Host percentage", color="Taxa", fill="Taxa") +
theme_classic()left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
mutate(host_percentage= host_bases/bases_post_fastp*100) %>% #convert bases to gigabases (GB)
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=singlem_fraction, x=sample_type, group=sample_type)) +
stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
theme_classic() +
labs(y="Host percentage", color="Taxa", fill="Taxa") +
theme_classic()
ggsave("figures/microbialdata_taxa_all.pdf",width=9, height=4, units="in")6.2 Domain-adjusted mapping rate
6.2.1 Data summary
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
filter(singlem_fraction>0) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(sample_type) %>%
summarise(mean=mean(damr, na.rm=T),sd=sd(damr, na.rm=T)) %>%
tt()| sample_type | mean | sd |
|---|---|---|
| Anal/cloacal swab | 72.84533 | 22.25244 |
| Faecal | 86.58154 | 25.00932 |
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
filter(singlem_fraction>0) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(tax_group) %>%
summarise(mean=mean(damr, na.rm=T),sd=sd(damr, na.rm=T)) %>%
tt()| tax_group | mean | sd |
|---|---|---|
| Amphibians | 89.19356 | 24.84575 |
| Bats | 80.94051 | 24.67483 |
| Birds | 67.03815 | 24.99000 |
| Mammals | 91.06092 | 16.77450 |
| Reptiles | 84.52325 | 31.14150 |
6.2.2 Statistical test
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
filter(singlem_fraction>0) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
lm(damr ~ sample_type + tax_group, data = .) %>%
anova() %>%
tidy()# A tibble: 3 × 6
term df sumsq meansq statistic p.value
<chr> <int> <dbl> <dbl> <dbl> <dbl>
1 sample_type 1 51142. 51142. 90.4 4.42e-21
2 tax_group 4 114714. 28679. 50.7 4.21e-41
3 Residuals 2561 1449606. 566. NA NA
6.2.3 Plot
left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(singlem_fraction>0) %>%
ggplot(aes(y=damr, x=sample_type, color=sample_type, fill=sample_type, group=sample_type)) +
geom_boxplot(outlier.shape = NA) +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
theme_classic() +
labs(y="DAMR", color="Taxa", fill="Taxa") +
theme_classic()left_join(read_tsv("data/mapping.tsv"),
read_tsv("data/preprocessing.tsv"),
by="sequence_id") %>%
left_join(read_tsv("data/sample.tsv"), by="sample_id") %>%
mutate(singlem_fraction=ifelse(singlem_fraction>1,100,singlem_fraction*100)) %>%
mutate(damr=ifelse(singlem_fraction<mapping_percentage,100,(mapping_percentage/singlem_fraction)*100)) %>% #convert bases to gigabases (GB)
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(singlem_fraction>0) %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=singlem_fraction, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="DAMR", color="Taxa", fill="Taxa") +
theme_classic()6.3 Assemblies
6.3.1 Data summary
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(tax_group) %>%
summarise(mean=mean(assembly_length, na.rm=T),sd=sd(assembly_length, na.rm=T)) %>%
tt()| tax_group | mean | sd |
|---|---|---|
| Amphibians | 116614689 | 68762523 |
| Bats | 31069176 | 77144825 |
| Birds | 8688016 | 26409919 |
| Mammals | 96415766 | 69841084 |
| Reptiles | 139349280 | 74190452 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(sample_type) %>%
summarise(mean=mean(assembly_length, na.rm=T),sd=sd(assembly_length, na.rm=T)) %>%
tt()| sample_type | mean | sd |
|---|---|---|
| Anal/cloacal swab | 13208042 | 34504175 |
| Faecal | 95955306 | 83010036 |
6.3.2 Statistical test
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
lm(assembly_length ~ sample_type + tax_group, data = .) %>%
anova() %>%
tidy()# A tibble: 3 × 6
term df sumsq meansq statistic p.value
<chr> <int> <dbl> <dbl> <dbl> <dbl>
1 sample_type 1 1.07e18 1.07e18 247. 8.15e- 52
2 tax_group 4 2.97e18 7.43e17 171. 2.10e-121
3 Residuals 1549 6.73e18 4.34e15 NA NA
6.3.3 Plot
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=assembly_length, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0,400000000)+
geom_jitter(alpha = 0.3, width=0.3, size=0.5) +
geom_violin() +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
facet_grid(~sample_type, scale="free") +
labs(y="Assembly size", color="Taxa", fill="Taxa") +
theme_classic()left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=assembly_length, x=sample_type, group=sample_type)) +
ylim(0,400000000)+
stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
theme_classic() +
labs(y="Assembly size", color="Taxa", fill="Taxa") +
theme_classic()
ggsave("figures/assemblysize_taxa_all.pdf",width=9, height=4, units="in")6.4 Number of MAGs
6.4.1 Summary statistics
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(tax_group) %>%
summarise(mean=mean(assembly_num_bins, na.rm=T),sd=sd(assembly_num_bins, na.rm=T)) %>%
tt()| tax_group | mean | sd |
|---|---|---|
| Amphibians | 23.758389 | 14.944334 |
| Bats | 3.692308 | 7.451098 |
| Birds | 1.582979 | 5.600340 |
| Mammals | 22.062635 | 16.345908 |
| Reptiles | 23.924107 | 13.736366 |
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
group_by(sample_type) %>%
summarise(mean=mean(assembly_num_bins, na.rm=T),sd=sd(assembly_num_bins, na.rm=T)) %>%
tt()| sample_type | mean | sd |
|---|---|---|
| Anal/cloacal swab | 2.644068 | 6.971768 |
| Faecal | 18.386792 | 16.099835 |
6.4.2 Statistical test
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
lm(assembly_num_bins ~ sample_type * tax_group, data = .) %>%
anova() %>%
tidy()# A tibble: 4 × 6
term df sumsq meansq statistic p.value
<chr> <int> <dbl> <dbl> <dbl> <dbl>
1 sample_type 1 38873. 38873. 259. 6.50e- 54
2 tax_group 4 119651. 29913. 199. 1.06e-137
3 sample_type:tax_group 4 13549. 3387. 22.5 4.16e- 18
4 Residuals 1545 232279. 150. NA NA
6.4.3 Plot
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>%
ggplot(aes(y=assembly_num_bins, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0,100) +
geom_jitter(alpha = 0.6, width=0.3, size=0.5) +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
facet_grid(~sample_type, scale="free") +
labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
theme_classic()left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=assembly_num_bins, x=sample_type, group=sample_type)) +
ylim(0,100) +
stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
theme_classic() +
labs(y="Number of MAGs", color="Taxa", fill="Taxa") +
theme_classic()6.5 MAG quality
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=mag_completeness, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(50,100) +
#geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="Number of bins", color="Taxa", fill="Taxa") +
theme_classic()left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=mag_contamination, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
ylim(0,10) +
#geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
stat_halfeye(adjust = 0.5, width = 0.5, .width = 0,normalize = "groups") +
scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="Number of bins", color="Taxa", fill="Taxa") +
theme_classic()6.6 Assemblies vs MAGs
left_join(read_tsv("data/preprocessing.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
left_join(read_tsv("data/assembly.tsv"), by="preprocessing_id") %>%
left_join(read_tsv("data/mag.tsv"), by="assembly_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(assembly_type == "Individual") %>%
ggplot(aes(y=assembly_num_bins, x=assembly_length, color=tax_group)) +
geom_point(alpha=0.5, size=0.5) +
scale_color_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
theme_classic() +
labs(y="Number of bins", x="Assembly length", color="Taxa", fill="Taxa")